// tables.do

clear all
capture log close	
ssc install reghdfe
ssc install ivreghdfe

cd "projectfolder"

use "analysis_data_organized.dta", clear

qui reg EA_new firstborn ea_new_ukb_ld ea_new_23me_ukb_ld rank family_size last_child sex
keep if e(sample)

egen N_famid_new=count(famid) if famid!=., by(famid)
tab N_famid_new
drop if N_famid_new==1
count

// Standardize the polygenic scores 
global pgs_new ea_new_ukb_ld ea_new_ukb_ld_0 ea_new_ukb_ld_1 ea_new_23me_ukb_ld ea_23me_ld
foreach i in $pgs_new {
	qui sum `i', detail
	replace `i' = (`i' - r(mean))/r(sd)
}
 
// 14,850

* Testing specification of Booth & Kee (2009)
gen higher_edu=(EA_new==20)
probit higher_edu rank sex i.YoB i.MoB e_PC* family_size_imp
margins, dydx(rank)

**************************************************************
* TABLE 1: DESCRIPTIVE STATISTICS ANALYSIS SAMPLE (N = 14,850)
**************************************************************
fsum EA_new firstborn ea_new_ukb_ld rank family_size_imp last_child sex, format(%12.3f)

********************************************************************************
* TABLE 2. REGRESSIONS OF YEARS OF EDUCATION ON THE GENE-ENVIRONMENT INTERACTION 
********************************************************************************
// Additional scores used for ORIV: ea_new_ukb_ld_0 ea_new_ukb_ld_1
// control variables 
xtset famid ID
global controls sex i.YoB i.MoB e_PC* family_size_imp

// Column 1: ols without interaction between fam
qui eststo ols_bf_noint: reg    EA_new i.firstborn ea_new_ukb_ld $controls, cluster(famid)
// Column 2: ols with interaction between fam
qui eststo ols_bf:       reg    EA_new i.firstborn ea_new_ukb_ld firstborn#c.ea_new_ukb_ld $controls, cluster(famid)
// Column 4: ols with interaction between fam
qui eststo ols_wf_noint:   xtreg  EA_new i.firstborn ea_new_ukb_ld $controls, cluster(famid) fe
// Column 5: ols with interaction between fam
qui eststo ols_wf:       xtreg  EA_new i.firstborn ea_new_ukb_ld firstborn#c.ea_new_ukb_ld $controls, cluster(famid) fe

// ORIV preparation
** compute correlation between pgs to determine the scaling factor
reg ea_new_ukb_ld_0 ea_new_ukb_ld_1
sca corrscores = _b[ea_new_ukb_ld_1]
qui replace ea_new_ukb_ld_0 = ea_new_ukb_ld_0/sqrt(corrscores)
qui replace ea_new_ukb_ld_1 = ea_new_ukb_ld_1/sqrt(corrscores)

** Create the interaction vars 
gen firstborn_0=firstborn*ea_new_ukb_ld_0
gen firstborn_1=firstborn*ea_new_ukb_ld_1

** Regressions with individual and splitscores
qui eststo fe_ukb: xtreg EA_new firstborn ea_new_ukb_ld c.firstborn#c.ea_new_ukb_ld $controls, cluster(famid) fe
qui eststo fe_ukb0: xtreg EA_new firstborn ea_new_ukb_ld_0 firstborn_0 $controls, cluster(famid) fe 
qui eststo fe_ukb1: xtreg EA_new firstborn ea_new_ukb_ld_1 firstborn_1 $controls, cluster(famid) fe 

** Run IV FE sequentially
qui eststo fe_ivukb0: xtivreg EA_new firstborn (ea_new_ukb_ld_0 firstborn_0 = ea_new_ukb_ld_1 firstborn_1) $controls, fe vce(cluster famid)
qui eststo fe_ivukb1: xtivreg EA_new firstborn (ea_new_ukb_ld_1 firstborn_1 = ea_new_ukb_ld_0 firstborn_0) $controls, fe vce(cluster famid)

// Column 3: ORIV without fixed effects
preserve
expand 2, generate(replicant)
gen mainVar = ea_new_ukb_ld_0 if replicant == 0
gen mainVarint = firstborn * ea_new_ukb_ld_0 if replicant==0
replace mainVar = ea_new_ukb_ld_1 if replicant == 1
replace mainVarint = firstborn * ea_new_ukb_ld_1 if replicant==1
gen instrument = ea_new_ukb_ld_1 if replicant == 0
gen instrumentint = firstborn * ea_new_ukb_ld_1 if replicant==0
replace instrument = ea_new_ukb_ld_0 if replicant == 1
replace instrumentint = firstborn * ea_new_ukb_ld_0 if replicant==1
reghdfe mainVar instrument $controls, absorb(replicant) cluster(famid ID)
testparm instrument
ivreghdfe EA_new (mainVar mainVarint = instrument instrumentint) firstborn $controls, absorb(replicant, save) cluster(famid ID)
estimates store bf_iv
sum __hdfe1__
*THE BELOW IS TO GET THE RIGHT CONSTANT TERM FOR THE TABLE
forvalues x=0/1 {
	qui gen constant`x' = replicant == `x'
	}
ivreghdfe EA_new (mainVar mainVarint = instrument instrumentint) firstborn $controls constant*, cluster(famid ID)
restore

// Column 6: ORIV with family fixed effects
preserve
expand 2, generate(replicant)
egen newid = concat(famid replicant)
gen long newidreal = real(newid)
xtset newidreal
gen mainVar = ea_new_ukb_ld_0  if replicant == 0
gen mainVarint = firstborn * ea_new_ukb_ld_0  if replicant==0
replace mainVar = ea_new_ukb_ld_1 if replicant == 1
replace mainVarint = firstborn * ea_new_ukb_ld_1 if replicant==1
gen instrument = ea_new_ukb_ld_1 if replicant == 0
gen instrumentint = firstborn * ea_new_ukb_ld_1 if replicant==0
replace instrument = ea_new_ukb_ld_0 if replicant == 1
replace instrumentint = firstborn * ea_new_ukb_ld_0 if replicant==1
reghdfe mainVar instrument $controls, absorb(newidreal) cluster(famid ID)
testparm instrument
ivreghdfe EA_new (mainVar mainVarint = instrument instrumentint) firstborn $controls, absorb(newidreal, res save) cluster(famid ID) 
estimates store fe_iv
predict EA_pred, xbd
sum __hdfe1__
restore

* Run IV sequentially
xi: ivreg EA_new firstborn (ea_new_ukb_ld_0 firstborn_0 = ea_new_ukb_ld_1 firstborn_1) $controls, cluster (famid)
eststo bf_ivukb0
xi: ivreg EA_new firstborn (ea_new_ukb_ld_1 firstborn_1 = ea_new_ukb_ld_0 firstborn_0) $controls, cluster (famid)
eststo bf_ivukb1


// Compile the table 
esttab ols_bf_noint ols_bf bf_iv ols_wf_noint ols_wf fe_iv, noomitted nobase se star(* 0.10 ** 0.05 *** 0.01) ///
	   stats(N r2) drop(e_PC_* *.MoB *.YoB sex family_size_imp ) title("Comparison of ORIV results to OLS") ///
	   se(3) b(3) ///
	   mtitles("OLS Between-family" "OLS Between-family" "ORIV Between-family" "OLS Within-family" "OLS Within-family" "ORIV Within-family") ///
	   addnotes("Robust standard errors in parentheses, clustered by family; Coefficients for the control variables (year and month of birth," ///
	   "gender and the first 40 PCs of the genetic relatedness matrix) are not displayed, but available upon request from the authors. We do not" ///
	   "report the R2 for the ORIV specifications in Column (3) and (6) given the differences in its interpretation and computation for the instrumental" ///
	   "variable type of regressions.") 
